/*
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
package org.xwiki.officeimporter.internal.cleaner;

import java.io.StringReader;

import org.junit.jupiter.api.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xwiki.test.junit5.mockito.ComponentTest;

import static org.junit.jupiter.api.Assertions.assertEquals;

/**
 * Test case for cleaning HTML links ({@code<a/>} elements) in {@link OfficeHTMLCleaner}.
 * 
 * @version $Id$
 * @since 1.8
 */
@ComponentTest
public class LinkOfficeCleaningTest extends AbstractHTMLCleaningTest
{
    /**
     * The HTML generated by open office server includes anchors of the form:
     * 
     * <pre>
     * {@code <a name="table1">
     *   <h1>Sheet 1: <em>Hello</em></h1>
     * </a>}
     * </pre>
     * 
     * and the default HTML cleaner converts them to:
     * 
     * <pre>
     * {@code <a name="table1"/>
     * <h1>
     *   <a name="table1">Sheet 1: <em>Hello</em></a>
     * </h1>
     * </pre>
     * 
     * this is because of the close-before-copy-inside behavior of default HTML cleaner. Thus the additional
     * (copy-inside) anchor needs to be ripped off.
     */
    @Test
    public void duplicateAnchorRemoving()
    {
        String html = header + "<div><a href=\"www.xwiki.org\">xwiki</a></div>"
            + "<a name=\"table1\"/><h1><a name=\"table1\">Sheet 1: <em>Hello</em></a></h1>"
            + "<div><a href=\"www.xwiki.org\">xwiki</a></div>" + footer;
        Document doc = officeHTMLCleaner.clean(new StringReader(html));
        NodeList nodes = doc.getElementsByTagName("a");
        assertEquals(3, nodes.getLength());
        Element parent = (Element) nodes.item(1).getParentNode();
        assertEquals("p", parent.getNodeName());
    }

    /**
     * Test duplicate anchor filtering with TOC structures. see: https://jira.xwiki.org/browse/XWIKI-3415
     */
    @Test
    public void anchorFilteringWithTOC()
    {
        String html = header + "<div>some text<h1><a name=\"Topic1\"/>Topic1</h1></div>" + footer;
        Document doc = officeHTMLCleaner.clean(new StringReader(html));
        NodeList nodes = doc.getElementsByTagName("a");
        assertEquals(1, nodes.getLength());
    }
}
